pred_diet_dist <- data.frame(pred_species=c("Herring", "Sprat", "Cod", "Haddock", "Whiting", "Blue Whiting", "Norway Pout", "Poor Cod", "European Hake", "Monkfish", "Horse Mackerel", "Mackerel", "Common Dab", "Plaice", "Megrim", "Sole", "Boarfish"))
prey_type <- c("benthos", "fish", "nekton", "other", "zooplankton")
pred_diet_dist[ , prey_type] <- NA
#use below for loop to fill in this data frame
#then, use facet_wrap() to plot multiple pie charts from it
#species_list includes Boarfish which doesn't have values, so add 'length()-1' to account for this - i.e. doesn't analyse any Boarfish data because it's nonexistent
prey_type_list <- list("benthos", "fish", "nekton", "other", "zooplankton")
for (i in 1:(length(species_list)-1)){
b=f=n=o=z=0
first_species <- renamed_df %>% filter(renamed_df$pred_species == fixed(species_list[i]))
benthos <- first_species[first_species$prey_type == fixed("benthos"),]
b <- length(benthos$haul_id)
fish <- first_species[first_species$prey_type == fixed("fish"),]
f <- length(fish$haul_id)
nekton <- first_species[first_species$prey_type == fixed("nekton"),]
n <- length(nekton$haul_id)
other <- first_species[first_species$prey_type == fixed("other"),]
o <- length(other$haul_id)
zoo <- first_species[first_species$prey_type == fixed("zooplankton"),]
z <- length(zoo$haul_id)
pie(c(b,f,n,o,z), prey_type_list, main = species_list[i])
}
#ggplot(renamed_df, aes(x="", y="abc", fill=prey_type)) +
# geom_bar(stat = "identity", width=1, position=position_fill()) +
# coord_polar(theta="y") +
# facet_wrap (.~renamed_df$pred_species, scale="free_y")
These are individual pie charts showing the distribution of the type of prey each predator eats.
#Separated into individual plots for each predator species -> using facet_wrap for the variable (pred_species)
ggplot(data = renamed_df, aes(x=log(ppmr)), group=1) +
labs(title = "log(PPMR) v. biomass density of prey", x="log(PPMR)", y="Biomass density of prey") +
facet_wrap(~renamed_df$pred_species, scale="free_y") +
theme(strip.text = element_text(size = 5)) +
geom_density(aes(weight = prey_weight_g), colour="red")
#poor cod and boarfish?
for (i in 1:length(species_list)){
name <- species_list[i]
first_species <- renamed_df %>%
filter(pred_species == fixed(name))
ggplot(data = first_species, aes(x=log(ppmr)), group=1) +
labs(title = name, x="log(PPMR)", y="biomass density of prey") +
geom_density(aes(weight = prey_weight_g), colour="red") +
theme(plot.title = element_text(size=15))
av <- weighted.mean(first_species$ppmr, w = first_species$prey_weight_g, na.rm = TRUE)
stan_dev <- sqrt(wtd.var(first_species$ppmr, w = first_species$prey_weight_g, na.rm = TRUE))
#make standard deviation weighted by prey biomass
print(paste(name, av, stan_dev))
}
## [1] "Herring 19709.823009688 241475.080343766"
## [1] "Sprat 195.609172891389 4253.36606797894"
## [1] "Cod 497.290047167729 6772.41910772189"
## [1] "Haddock 3022.97716139834 21797.4455834206"
## [1] "Whiting 1264.39654448219 24534.9527990708"
## [1] "Blue Whiting 2010.93278075909 17154.1672149982"
## [1] "Norway Pout 10973.8360655567 26164.1899818578"
## [1] "Poor Cod 517.625942013013 2734.01380637647"
## [1] "European Hake 76.7901806692788 310.238947982841"
## [1] "Monkfish 62.9178507723842 100.903624078094"
## [1] "Horse Mackerel 30796.6346791468 308842.203990889"
## [1] "Mackerel 205801.946353935 817171.776698264"
## [1] "Common Dab 128.357530231156 781.069519499324"
## [1] "Plaice 587.735382021861 2347.38746472048"
## [1] "Megrim 64.4484436302096 165.770999136965"
## [1] "Sole 274.421031981985 2453.27892002065"
## [1] "Boarfish NaN NA"
Looking for the most common PPMR for each individual species.
A graph of the weighted ppmr for each species against the biomass density of the prey. Prints the mean ppmr, as weighted by prey biomass.
#Separated into individual plots for each predator species -> using facet_wrap for the variable (pred_species)
ggplot(data = renamed_df, aes(x=log(ppmr)), group=1) +
labs(title = "Scatter plot: log(PPMR) v. number density of prey", x="log(PPMR)", y="Number density of prey") +
facet_wrap(~renamed_df$pred_species, scale="free_y") +
theme(strip.text = element_text(size = 5)) +
geom_density(aes(weight = no._prey_per_stmch), colour="red")
for (i in 1:length(species_list)){
name <- species_list[i]
first_species <- renamed_df %>%
filter(pred_species == fixed(name))
ggplot(data = first_species, aes(x=log(ppmr)), group=1) +
labs(title = name, x="log(PPMR)", y="Number density of prey") +
geom_density(aes(weight = no._prey_per_stmch), colour="red") +
theme(plot.title = element_text(size=15))
av <- weighted.mean(first_species$ppmr, w = first_species$no._prey_per_stmch, na.rm = TRUE)
stan_dev <- sqrt(wtd.var(first_species$ppmr, w = first_species$no._prey_per_stmch, na.rm = TRUE))
print(paste(name, av, stan_dev))
}
## [1] "Herring 4920445.2411513 7365532.72173494"
## [1] "Sprat 22863.8134490306 63519.8543048881"
## [1] "Cod 37331.0819181898 3862607.45841392"
## [1] "Haddock 143561.249495721 256951.107690553"
## [1] "Whiting 231137.0252423 452944.952135886"
## [1] "Blue Whiting 87884.0802338307 193515.083373986"
## [1] "Norway Pout 41782.5762445494 55159.3033736074"
## [1] "Poor Cod 17279.5676618823 20349.7295229756"
## [1] "European Hake 296.484038403308 2155.3411177515"
## [1] "Monkfish 214.778052908268 1238.10824158785"
## [1] "Horse Mackerel 1656066.03448337 5990392.43827448"
## [1] "Mackerel 7498547.2651935 14983797.9019753"
## [1] "Common Dab 4019.29968530871 23239.7703715824"
## [1] "Plaice 27238.3526014258 136213.922484656"
## [1] "Megrim 634.567952744918 2245.62067618099"
## [1] "Sole 32524.9032981856 300306.499870647"
## [1] "Boarfish NaN NA"
Looking for the most common PPMR for each individual species.
A graph of the weighted ppmr for each species against the number density of the prey. Prints the mean ppmr, as weighted by number of prey.
first_species <- renamed_df %>% filter(pred_species == fixed("Herring"))
#separate data set of a single species
renamed_df = renamed_df %>% mutate(lppmr = log(ppmr))
#adding a column of log(ppmr) values
herringDF <- renamed_df %>%
filter(pred_species == fixed("Herring"))
herringmean = mean(herringDF$lppmr)
herringSD = sqrt(var(herringDF$lppmr))
#these are non-weighted means
#var is variance, i.e. ave. distance from each point to the mean
#mean is the arithmetic mean of log(ppmr)
bio_herringmean = weighted.mean(herringDF$lppmr, w = herringDF$prey_weight_g, na.rm = TRUE)
bio_herringSD = sqrt(wtd.var(herringDF$lppmr, w = herringDF$prey_weight_g, na.rm = TRUE))
#mean and standard deviation, weighted by the prey biomass
ggplot(data = first_species, aes(x=log(ppmr)), group=1) +
labs(title = "log(ppmr) against biomass density for Herring, weighted by prey biomass
(with a normal distribution, weighted by prey biomass)", x="log(PPMR)",y="Biomass density of observations") +
geom_density(aes(weight = prey_weight_g), colour="red") +
theme(plot.title = element_text(size=15)) +
stat_function(fun = dnorm, args= with(herringDF, c(mean = bio_herringmean, sd = bio_herringSD)))
no_herringmean = weighted.mean(herringDF$lppmr, w = herringDF$no._prey_per_stmch, na.rm = TRUE)
no_herringSD = sqrt(wtd.var(herringDF$lppmr, w = herringDF$no._prey_per_stmch, na.rm = TRUE))
#weighted standard deviation found using the weighted variance
#mean and standard deviation, weighted by the number of prey
ggplot(data = first_species, aes(x=log(ppmr)), group=1) +
labs(title = "log(ppmr) against number density for Herring, weighted by no. of prey
(with a normal distribution, weighted by prey biomass)", x="log(PPMR)", y="No. density of observations") +
geom_density(aes(weight = no._prey_per_stmch), colour="red") +
theme(plot.title = element_text(size=15)) +
stat_function(fun = dnorm, args= with(herringDF, c(mean = no_herringmean, sd = no_herringSD)))
#combined graph of weighted by biomass and by number density
ggplot(data = first_species, aes(x=log(ppmr)), group=1) +
labs(title = "log(ppmr) against biomass/number density for Herring,
weighted by no. of prey", x="log(PPMR)", y="Number/biomass density of observations") +
geom_density(aes(weight = no._prey_per_stmch), colour="red") +
geom_density(aes(weight = prey_weight_g), colour="blue") +
theme(plot.title = element_text(size=15))
#How to add key for the lines??
Plotting the distribution of Herring log(PPMR) as weighted by prey biomass and number of prey (with weighted normal distribution curves plotted over the top of each).
Prey biomass weighted mean: 6.629177 No. of prey weighted mean: 13.54102
The mean is shifted by 6.911843.
What does this mean?
ggplot(data = renamed_df, aes(log(indiv_prey_weight), no._prey_per_stmch)) +
labs(title = "log(prey weight) v. number of prey per predator stomach", x="log(prey weight)", y="No. of prey per predator stomach") +
geom_point(size=0.5)
#Some interesting results --> introduce colours to show ships
renamed_df$'haul_id_short' <- gsub("\\-.*", "", renamed_df$'haul_id')
renamed_df$'haul_id_short' <- gsub("_", "", renamed_df$'haul_id_short')
#rename haul_id values -> separate by ship names (e.g. CLYDE) rather than complete id (e.g. CLYDE-1935-6)
haul_list <- unique(renamed_df$haul_id_short)
interesting_haul <- filter(renamed_df, haul_id_short=='CLYDE'|haul_id_short=='END04'|haul_id_short=='CORY08'|haul_id_short=='LUC'|haul_id_short=='HIDDINK'|haul_id_short=='EXCmacDATSTO815'|haul_id_short=="Excmacdatsto815error")
haul_list_interesting <- unique(interesting_haul$haul_id_short)
#Vertical lines: CLYDE, BLEGVARD, CIROL05, END03, LAST, LUC (*), DUBUIT, HARDY, JOHNSON
#Curves: END04
#Horizontal lines: CORY08, HIDDINK
#Identical: EXCmacDATSTO815, Excmacdatsto815error
ggplot (data = interesting_haul, aes(x=log(indiv_prey_weight), y=no._prey_per_stmch)) +
labs(title = "log(prey weight) v. number of prey per stomach - separated by ship names", x="log(prey weight)", y="No. prey per stomach") +
geom_point(size=.1, colour="red") +
theme(strip.text = element_text(size = 5)) + facet_wrap(~interesting_haul$haul_id_short, scale="free_y")
#create new facet_wrap() over: vertical lines, curves, horizontal lines, identical
Playing around with data to see any specific correlations; what is the distribution of the weight of prey recorded.
Also, I found that the EXCmacDATSTO815 and Excmacdatsto815error gave exactly the same data (which might need to be considered in later calculations).
ggplot (data = renamed_df, aes(indiv_prey_weight, pred_weight_g)) +
geom_point(size=0.5) +
labs(title = "Predator v. prey mass plot", x="Prey mass (g)", y="Predator mass (g)")
#mass since measured in g
ggplot(data = renamed_df, aes(log(indiv_prey_weight), log(pred_weight_g))) +
geom_point(size=0.5) +
labs(title = "log(Predator mass) v. log(prey mass) plot", x="log(Prey mass)", y="log(Predator mass)") +
stat_smooth (method='lm', se=FALSE)
## `geom_smooth()` using formula 'y ~ x'
#slope <- coef(lm(log(renamed_df$pred_weight_g)~log(renamed_df$indiv_prey_weight)))
#print(paste("slope of the log(pred) v. log(prey) line of best fit:", slope))
#second part is intercept -> how to separate?
ggplot(data = renamed_df, aes(log(indiv_prey_weight), log(pred_weight_g))) +
labs(title = "log(pred. mass) v. log(prey mass) separated by predator species", x="log(prey mass)", y="log(pred. mass)") +
geom_point(size=0.2, colour="red") +
facet_wrap(~pred_species, scale="free_y") +
theme(strip.text = element_text(size = 10))
ggplot(data=renamed_df, aes(log(pred_weight_g), log(ppmr))) +
geom_point(size=0.5) +
labs(title = "log(pred mass) v. log(ppmr) plot", x="log(Pred mass)", y="log(PPMR)") +
stat_smooth (method='lm', se=FALSE)
## `geom_smooth()` using formula 'y ~ x'
slope2 <- coef(lm(log(renamed_df$ppmr)~log(renamed_df$pred_weight_g)))
print(paste("slope of the log(ppmr) v. log(pred_weight) line of best fit:", slope2))
## [1] "slope of the log(ppmr) v. log(pred_weight) line of best fit: 5.18652611475397"
## [2] "slope of the log(ppmr) v. log(pred_weight) line of best fit: 0.313443733574052"
#slope of the above plot
ggplot(data=renamed_df, aes(log(pred_weight_g), log(ppmr))) +
geom_point(size=0.5) +
labs(title = "log(pred mass) v. log(ppmr) plot", x="log(Pred mass)", y="log(PPMR)") + stat_smooth (method='lm', se=FALSE) +
facet_wrap(~pred_species, scale="free_y") +
stat_smooth(method='lm', se=FALSE)
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
species_df <- renamed_df %>% filter(pred_species == fixed("Poor Cod"))
ggplot(data=species_df, aes(log(pred_weight_g), log(ppmr))) +
geom_point(size=0.5) +
labs(title = "log(pred mass) v. log(ppmr) plot: Poor Cod", x="log(Pred mass)", y="log(PPMR)") + stat_smooth (method='lm', se=FALSE) +
facet_wrap(~pred_species, scale="free_y") +
stat_smooth(method='lm', se=FALSE)
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
species_slope <- coef(lm(log(species_df$ppmr)~log(species_df$pred_weight_g)))
print(paste("slope of the log(ppmr) v. log(pred_weight) line of best fit:", species_slope))
## [1] "slope of the log(ppmr) v. log(pred_weight) line of best fit: 6.20634582898381"
## [2] "slope of the log(ppmr) v. log(pred_weight) line of best fit: -0.0895529388001952"
#for (i in 1:length(species_list)){
# name <- species_list[i]
#first_species <- renamed_df %>% filter(pred_species == fixed(name))
#grad <- coef(lm(log(first_species$ppmr)~log(first_species$pred_weight_g)))
#print(paste(name, grad))
#}
log(pred mass) v. log(ppmr) -> is pred. mass related to ppmr?
We want them to not be proportional (i.e. slope = 0).
CHECK: IS THIS NO. OF POINTS RECORDED OR NO. POINTS*NO. PREY PER STOMACH
lmer(log(ppmr) ~ pred_species + prey_weight_g + (1|haul_id_short), data = renamed_df)
## Linear mixed model fit by REML ['lmerMod']
## Formula: log(ppmr) ~ pred_species + prey_weight_g + (1 | haul_id_short)
## Data: renamed_df
## REML criterion at convergence: 1118461
## Random effects:
## Groups Name Std.Dev.
## haul_id_short (Intercept) 1.663
## Residual 1.956
## Number of obs: 267431, groups: haul_id_short, 110
## Fixed Effects:
## (Intercept) pred_speciesCod
## 5.108989 1.413988
## pred_speciesCommon Dab pred_speciesEuropean Hake
## 0.981545 -0.577597
## pred_speciesHaddock pred_speciesHerring
## 1.916203 1.617905
## pred_speciesHorse Mackerel pred_speciesMackerel
## 1.930489 1.970072
## pred_speciesMegrim pred_speciesMonkfish
## -0.120421 -0.278221
## pred_speciesNorway Pout pred_speciesPlaice
## 2.074157 2.128633
## pred_speciesPoor Cod pred_speciesSole
## 0.648481 1.741491
## pred_speciesSprat pred_speciesWhiting
## 0.419409 0.811182
## prey_weight_g
## -0.003506
lmer(log(ppmr) ~ pred_species + prey_weight_g + (1|haul_id_short) + (1|year) + (1|no._prey_per_stmch) + (1|ices_rectangle), data = renamed_df)
## Linear mixed model fit by REML ['lmerMod']
## Formula: log(ppmr) ~ pred_species + prey_weight_g + (1 | haul_id_short) +
## (1 | year) + (1 | no._prey_per_stmch) + (1 | ices_rectangle)
## Data: renamed_df
## REML criterion at convergence: 1023959
## Random effects:
## Groups Name Std.Dev.
## no._prey_per_stmch (Intercept) 1.7994
## ices_rectangle (Intercept) 0.6634
## year (Intercept) 0.6564
## haul_id_short (Intercept) 1.6903
## Residual 1.7773
## Number of obs: 254595, groups:
## no._prey_per_stmch, 4507; ices_rectangle, 718; year, 97; haul_id_short, 97
## Fixed Effects:
## (Intercept) pred_speciesCod
## 7.354137 1.336221
## pred_speciesCommon Dab pred_speciesEuropean Hake
## 0.500668 -0.098397
## pred_speciesHaddock pred_speciesHerring
## 1.582278 1.008104
## pred_speciesHorse Mackerel pred_speciesMackerel
## 1.393272 1.341490
## pred_speciesMegrim pred_speciesMonkfish
## 0.432421 -0.132208
## pred_speciesNorway Pout pred_speciesPlaice
## 1.510116 1.853671
## pred_speciesPoor Cod pred_speciesSole
## 0.512387 1.706721
## pred_speciesSprat pred_speciesWhiting
## 0.030321 0.870806
## prey_weight_g
## -0.004578
## optimizer (nloptwrap) convergence code: 0 (OK) ; 0 optimizer warnings; 1 lme4 warnings
#lmer(fixed ~ (1|random) + linear)
Fixed effect: log(ppmr)
Random effects: haul_id_short, year
Linear fixed effects: pred_species, prey_weight_g